/********************************R&D Investment around AIPA********************************
Main purpose: identify US patents with EP or foreign parallel applications
datasets:
historical_masterfile.dta: US patents, which was downloaded from https://www.uspto.gov/ip-policy/economic-research/research-datasets/historical-patent-data-files on 5/29/2016
patents.dta: the link between patent and public firm assignees (permno) as developed by Kogan et al. (2017),  downloaded from Noah Stoffman's website (https://host.kelley.iu.edu/nstoffma/) on 4/13/2016
sale.dta: universe of Compustat/CRSP firms since 1970 as of the data download date (July 21, 2018)
********************************R&D Investment around AIPA********************************/



 
*~~~~~~~~~~~~~~Import Usercost of RD~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*
import delimited "$DATA\RDusercost_clean.csv", clear 

save "$DATA\RDusercost", replace

 
 
 ***************compute app-grant lag by uspc***************
use "$DATA\historical_masterfile",clear
destring patent, gen(patnum) force 
keep if patnum>=1 & patnum!=.
drop patent
keep if appl_dt>=td(1jan1998) & appl_dt<td(29nov2000)
gen gap=disp_dt-appl_dt
bys uspc: egen gap3=median(gap)
keep uspc gap3
duplicates drop
drop if missing(uspc)
saveold "$DATA\gap3_9800", replace version(12)

****************compute firm exposure to application-grant delays by USPC *****************
use "$DATA\historical_masterfile",clear
destring patent, gen(patnum) force 
keep if patnum>=1 & patnum!=.
drop patent
keep if appl_dt>=td(1jan1998) & appl_dt<td(29nov2000)
joinby patnum using "$DATA\patents"
drop if missing(permno)
bys permno: gen num=_N
bys permno uspc: gen uspc_num=_N
gen uspc_pct=uspc_num/num
drop *num
keep permno uspc uspc_pct
duplicates drop
joinby uspc using "$DATA\gap3_9800"
gen gap_temp=gap*uspc_pct
bys permno: egen gap_wt3=total(gap_temp)
keep permno  gap_wt3
duplicates drop
saveold "$DATA\gap_wt3", replace version(12)





*~~~~~~~~~~~~~~~~Data from Compustat/CRSP firms ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*
//sale.dta: universe of Compustat/CRSP firms since 1970 as of the data download date (July 21, 2018)
use "$DATA\sale",clear
rename _all, lower
capture rename variabl0 lt
gen year=year(datadate)
sort gvkey fyear
by gvkey: replace siccd=siccd[_n-1] if missing(siccd)
gsort gvkey -fyear
by gvkey: replace siccd=siccd[_n-1] if missing(siccd)
sort gvkey fyear
by gvkey: replace sich=sich[_n-1] if missing(sich)
gsort gvkey -fyear
by gvkey: replace sich=sich[_n-1] if missing(sich)
sort gvkey fyear
replace siccd=sich if siccd==.


***drop finance and utility industry
drop if siccd>=6000 & siccd<=6999  
drop if siccd>=4900 & siccd< 5000 
***drop finance and utility industry

***drop negative sales or assets
drop if at<0
drop if sale<0
drop if ceq<0 

**drop firms that cannot merge with CRSP
drop if missing(permno)

*actively trading firms: list of permno-years with 200 trading days per year
joinby permno year using  permno_trade200, unmatched(master)
tab _merge
keep if _merge==3
drop _merge


**potential sample period
keep if year<=2008 & year>=1994
duplicates drop permno fyear, force //1 obs


gen size=ln(at)
gen rd=xrd/at
replace rd=0 if rd==. & at!=.
gen ad=xad/at
replace ad=0 if ad==. & at!=.
gen capxat=capx/at
replace capxat=0 if capxat==.
egen debt=rowtotal(dltt dlc)
gen lev=debt/at
gen lev2=lt/at
gen roa=ib/at
gen loss=(ib<0)
egen roa2=rowtotal(ib xrd)
replace roa2=roa2/at
replace roa2=. if roa==. //35 chgs
gen loss2=(roa2<0) 
replace retf=retf-1 //composite return
replace retc=retc-1
gen mve=prcc_f*csho
gen mtb=mve/ceq
egen tobinq=rowtotal(mve debt)
egen tobinq2=rowtotal(mve lt)
replace tobinq=tobinq/at
replace tobinq2=tobinq2/at
drop debt
gen capint=ppent/at
gen sic2=int(siccd/100)
gen sic3=int(siccd/10)


label var xrd "R&D expenses" //xrd has lots of missing value
label var rd "xrd/at" //missing rd is treated as 0
label var lev "book leverage"
label var tobinq "total(mve,debt)/at)" //debt is  long-term debt plus short-term debt
label var tobinq2 "total(mve,lt)/at)" //lt is total liability
label var lev "debt/at"
label var lev2 "lt/at"
label var roa "ib/at"
label var roa2 "ROA before R&D expenses"
label var capint "capital intensity, ppent/at"
sort permno fyear

*get headquarter state info 
joinby gvkey fyear using "$DATA\hdq_state", unmatched(master)
tab _merge 
drop _merge 
save "$DATA\rd_int", replace 






**************************Analyses**************************
use "$DATA\rd_int", clear 
*exclude obs with missing variables*
drop if size*rd*capxat*lev*roa2*tobinq==.  


***********regression period: 1998-2003*********** 
keep if year<=2003 & year>=1998
***********regression period: 1998-2003*********** 



*post-AIPA dummy
gen post2=(datadate>=td(29nov2000))

gen rd_sale=xrd/sale
replace rd_sale=0 if xrd==.

gen rd_sale_raw=rd_sale
sum sale, det
sum rd_sale_raw , det
sum rd_sale_raw if sale>10 , det //Outliers driven by firms with less than 10m in sales

winsor2 rd rd_sale tobinq roa2 lev //Winsorize outliers
 

gen lnxrd=ln(1+xrd) //xrd was measured in million
replace lnxrd=0 if lnxrd==.  //replace missing value as 0



*****************get firm-specific exposure to application-grant delay*****************
joinby permno using "$DATA\gap_wt3", unmatched(master)
tab _merge 
drop _merge

 
replace gap_wt3=0 if gap_wt3==.
replace gap_wt3=0 if gap_wt3<18*30.5
replace gap_wt3=gap_wt3/365.25


 
 
*----------------Table 12------------------------* 
areg lnxrd i.post2##c.gap_wt3 size tobinq_w roa2_w loss lev_w i.year if sale>=10 &  year>=1998 & year<=2003, absorb(permno) vce(cluster permno)
outreg2 using "$DATA\Table_12.xls" , ctitle(lnxrd, cluster permno, absorb permno, include gap_wt3 zeros, 1998 to 2003 sale geq 10)  excel replace

areg lnxrd i.post2##c.gap_wt3 size tobinq_w roa2_w loss lev_w i.year if sale>=20 &  year>=1998 & year<=2003, absorb(permno) vce(cluster permno)
outreg2 using "$DATA\Table_12.xls", ctitle(lnxrd, cluster permno, absorb permno, include gap_wt3 zeros, 1998 to 2003 sale geq 20)  excel append
  
areg lnxrd i.post2##c.gap_wt3 size tobinq_w roa2_w loss lev_w i.year if sale>=50 &  year>=1998 & year<=2003, absorb(permno) vce(cluster permno)
outreg2 using "$DATA\Table_12.xls" , ctitle(lnxrd, cluster permno, absorb permno, include gap_wt3 zeros, 1998 to 2003 sale geq 50)  excel append
 
areg rd_sale i.post2##c.gap_wt3 size tobinq_w roa2_w loss lev_w i.year if sale>=10 &  year>=1998 & year<=2003, absorb(permno) vce(cluster permno)
outreg2 using "$DATA\Table_12.xls" , ctitle(rd_sale, cluster permno, absorb permno, include gap_wt3 zeros, 1998 to 2003 sale geq 10)  excel append

areg rd_sale i.post2##c.gap_wt3 size tobinq_w roa2_w loss lev_w i.year if sale>=20 &  year>=1998 & year<=2003, absorb(permno) vce(cluster permno)
outreg2 using "$DATA\Table_12.xls" , ctitle(rd_sale, cluster permno, absorb permno, include gap_wt3 zeros, 1998 to 2003 sale geq 20)  excel append

areg rd_sale i.post2##c.gap_wt3 size tobinq_w roa2_w loss lev_w i.year if sale>=50 &  year>=1998 & year<=2003, absorb(permno) vce(cluster permno)
outreg2 using "$DATA\Table_12.xls" , ctitle(rd_sale, cluster permno, absorb permno, include gap_wt3 zeros, 1998 to 2003 sale geq 50)  excel append
  

  
  
  
*----------------Table H1 --------------------*
file open xd using "$DATA\table_H1.txt",  replace write
file write xd  "Variable" _tab "Sample"  _tab "Mean"   _tab "SD" _tab  "N" _n

foreach var in rd_sale lnxrd gap_wt3 {    
	sum `var'  if  year>=1998 & year<=2003 , det
	global  round_n=round(`r(N)'/1000)*1000
	file write  xd %12s "`var'" _tab %12s "if  year>=1998 & year<=2003" _tab  %12.3f  (r(mean))  _tab   %12.3f  (r(sd)) _tab   %12.3f (`r(N)') _n  
  	sum `var'  if sale>=10 &  year>=1998 & year<=2003 , det
	global  round_n=round(`r(N)'/1000)*1000
	file write  xd %12s "`var'" _tab %12s "if sale>10 &  year>=1998 & year<=2003" _tab  %12.3f  (r(mean))  _tab   %12.3f  (r(sd)) _tab   %12.3f (`r(N)') _n  
  	sum `var'  if sale>=50 &  year>=1998 & year<=2003 , det 
	file write  xd %12s "`var'" _tab %12s "if sale>50 &  year>=1998 & year<=2003" _tab  %12.3f  (r(mean))  _tab   %12.3f  (r(sd)) _tab   %12.3f (`r(N)') _n  
 } 
 
file  close xd    
  
  
  
  
  
*----------------Table H2------------------------*   
*gap expsure for long versus short technology cycle, respectively	 
joinby permno using "$DATA\gap_wt3techcycle", unmatched(master)
tab _merge 
drop _merge 

preserve 
replace gap_wt3techcycle1=0 if gap_wt3techcycle1==.
replace gap_wt3techcycle1=0 if gap_wt3techcycle1<18*30.5
replace gap_wt3techcycle1=gap_wt3techcycle1/365.25

replace gap_wt3techcycle0=0 if gap_wt3techcycle0==.
replace gap_wt3techcycle0=0 if gap_wt3techcycle0<18*30.5
replace gap_wt3techcycle0=gap_wt3techcycle0/365.25
 
 reghdfe lnxrd i.post2##c.gap_wt3techcycle1  i.post2##c.gap_wt3techcycle0 size tobinq_w roa2_w loss lev_w if sale>=10, absorb(permno year) vce(cluster permno)
indeplist
outreg2 using "$DATA\tableh1.xls",bdec(3) keep(`r(X)')
reghdfe lnxrd i.post2##c.gap_wt3techcycle1  i.post2##c.gap_wt3techcycle0 size tobinq_w roa2_w loss lev_w if sale>=20, absorb(permno year) vce(cluster permno)
indeplist
outreg2 using "$DATA\tableh1.xls",bdec(3) keep(`r(X)')
reghdfe lnxrd i.post2##c.gap_wt3techcycle1  i.post2##c.gap_wt3techcycle0 size tobinq_w roa2_w loss lev_w if sale>=50, absorb(permno year) vce(cluster permno)
indeplist
outreg2 using "$DATA\tableh1.xls",bdec(3) keep(`r(X)')
restore 



	 
*gap expsure for high- versus low opt-out USPC, respectively	 
joinby permno using "$DATA\gap_wt3optout", unmatched(master)
tab _merge 
drop _merge 

preserve 
replace gap_wt3optout1=0 if gap_wt3optout1==.
replace gap_wt3optout1=0 if gap_wt3optout1<18*30.5
replace gap_wt3optout1=gap_wt3optout1/365.25

replace gap_wt3optout0=0 if gap_wt3optout0==.
replace gap_wt3optout0=0 if gap_wt3optout0<18*30.5
replace gap_wt3optout0=gap_wt3optout0/365.25


reghdfe lnxrd i.post2##c.gap_wt3optout1  i.post2##c.gap_wt3optout0 size tobinq_w roa2_w loss lev_w if sale>=10, absorb(permno year) vce(cluster permno)
indeplist
outreg2 using "$DATA\tableh1.xls",bdec(3) keep(`r(X)')
reghdfe lnxrd i.post2##c.gap_wt3optout1  i.post2##c.gap_wt3optout0 size tobinq_w roa2_w loss lev_w if sale>=20, absorb(permno year) vce(cluster permno)
indeplist
outreg2 using "$DATA\tableh1.xls",bdec(3) keep(`r(X)')
reghdfe lnxrd i.post2##c.gap_wt3optout1  i.post2##c.gap_wt3optout0 size tobinq_w roa2_w loss lev_w if sale>=50, absorb(permno year) vce(cluster permno)
indeplist
outreg2 using "$DATA\tableh1.xls",bdec(3) keep(`r(X)')
restore 





*gap expsure for high- versus low similarity, respectively	 
joinby permno using "$DATA\gap_wt3sim", unmatched(master)
tab _merge 
drop _merge 

preserve 
replace gap_wt3sim1=0 if gap_wt3sim1==.
replace gap_wt3sim1=0 if gap_wt3sim1<18*30.5
replace gap_wt3sim1=gap_wt3sim1/365.25

replace gap_wt3sim0=0 if gap_wt3sim0==.
replace gap_wt3sim0=0 if gap_wt3sim0<18*30.5
replace gap_wt3sim0=gap_wt3sim0/365.25

 reghdfe lnxrd i.post2##c.gap_wt3sim1  i.post2##c.gap_wt3sim0 size tobinq_w roa2_w loss lev_w if sale>=10, absorb(permno year) vce(cluster permno)
indeplist
outreg2 using "$DATA\tableh1.xls",bdec(3) keep(`r(X)')
reghdfe lnxrd i.post2##c.gap_wt3sim1  i.post2##c.gap_wt3sim0 size tobinq_w roa2_w loss lev_w if sale>=20, absorb(permno year) vce(cluster permno)
indeplist
outreg2 using "$DATA\tableh1.xls",bdec(3) keep(`r(X)')
reghdfe lnxrd i.post2##c.gap_wt3sim1  i.post2##c.gap_wt3sim0 size tobinq_w roa2_w loss lev_w if sale>=50, absorb(permno year) vce(cluster permno)
indeplist
outreg2 using "$DATA\tableh1.xls",bdec(3) keep(`r(X)')
rm "$DATA\tableh1.txt"
restore 
  
  


*----------------- Figure H.1---------------------------*

use "$DATA\rd_int", clear 
*exclude obs with missing variables*
drop if size*rd*capxat*lev*roa2*tobinq==. 


***********sample construction*********** 
keep if year<=2003 & year>=1997
***********sample construction***********

*post-AIPA dummy
gen post2=(datadate>=td(29nov2000))

gen rd_sale=xrd/sale
replace rd_sale=0 if xrd==.

gen rd_sale_raw=rd_sale
sum sale, det
sum rd_sale_raw , det
sum rd_sale_raw if sale>10 , det //Outliers driven by firms with less than 10m in sales

winsor2 rd rd_sale tobinq roa2 lev //Winsorize outliers
 
gen ln_rd_sale=log(rd_sale+1)

gen lnxrd=ln(1+xrd) //xrd was measured in million
replace lnxrd=0 if lnxrd==.  //replace missing value as 0


 

*****************get firm-specific exposure to application-grant delay*****************
joinby permno using "$DATA\gap_wt3", unmatched(master)
tab _merge 
drop _merge

 
replace gap_wt3=0 if gap_wt3==.
replace gap_wt3=0 if gap_wt3<18*30.5
replace gap_wt3=gap_wt3/365.25

xtile gap_wt3_cat2=gap_wt3, n(5)
sum gap_wt3 if gap_wt3_cat2==1
sum gap_wt3 if gap_wt3_cat2==5
  

areg lnxrd  size tobinq_w roa2_w loss lev_w i.year if sale>=10 &  year>=1997 & year<=2003 & gap_wt3_cat2==1, absorb(permno) vce(cluster permno)
outreg2 using "$DATA\user_cost_results_explore_rd_intensity_pretrend2_1997_2003.xls" , ctitle(lnxrd, cluster permno, absorb permno, include gap_wt3 zeros, 1997 to 2003 sale geq 10)  excel replace
 
areg  lnxrd    size tobinq_w roa2_w loss lev_w i.year if sale>=10 &  year>=1997 & year<=2003 & gap_wt3_cat2==5, absorb(permno) vce(cluster permno)
outreg2 using "$DATA\user_cost_results_explore_rd_intensity_pretrend2_1997_2003.xls", ctitle(lnxrd , cluster permno, absorb permno, include gap_wt3 zeros, 1997 to 2003 sale geq 10)  excel append
 
